# All installed under v3.6, and v4.2.2 with exception of mapproj, gdsfmt, SNPRelate
library(tidyverse); theme_set(theme_classic())
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(fs)
library(ggpubr)
library(ggbeeswarm)
library(ggrepel)
library(cowplot); theme_set(theme_cowplot(font_size=6))
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:ggpubr':
##
## get_legend
##
## The following object is masked from 'package:lubridate':
##
## stamp
library(ggtree)
## Registered S3 methods overwritten by 'treeio':
## method from
## MRCA.phylo tidytree
## MRCA.treedata tidytree
## Nnode.treedata tidytree
## Ntip.treedata tidytree
## ancestor.phylo tidytree
## ancestor.treedata tidytree
## child.phylo tidytree
## child.treedata tidytree
## full_join.phylo tidytree
## full_join.treedata tidytree
## groupClade.phylo tidytree
## groupClade.treedata tidytree
## groupOTU.phylo tidytree
## groupOTU.treedata tidytree
## is.rooted.treedata tidytree
## nodeid.phylo tidytree
## nodeid.treedata tidytree
## nodelab.phylo tidytree
## nodelab.treedata tidytree
## offspring.phylo tidytree
## offspring.treedata tidytree
## parent.phylo tidytree
## parent.treedata tidytree
## root.treedata tidytree
## rootnode.phylo tidytree
## sibling.phylo tidytree
## ggtree v3.6.2 For help: https://yulab-smu.top/treedata-book/
##
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
##
## Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam.
## ggtree: an R package for visualization and annotation of phylogenetic
## trees with their covariates and other associated data. Methods in
## Ecology and Evolution. 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
##
## G Yu. Data Integration, Manipulation and Visualization of Phylogenetic
## Trees (1st ed.). Chapman and Hall/CRC. 2022. ISBN: 9781032233574
##
## Shuangbin Xu, Lin Li, Xiao Luo, Meijun Chen, Wenli Tang, Li Zhan, Zehan
## Dai, Tommy T. Lam, Yi Guan, Guangchuang Yu. Ggtree: A serialized data
## object for visualization of a phylogenetic tree and annotation data.
## iMeta 2022, 1(4):e56. doi:10.1002/imt2.56
##
## Attaching package: 'ggtree'
##
## The following object is masked from 'package:ggpubr':
##
## rotate
##
## The following object is masked from 'package:tidyr':
##
## expand
library(ape)
##
## Attaching package: 'ape'
##
## The following object is masked from 'package:ggtree':
##
## rotate
##
## The following object is masked from 'package:ggpubr':
##
## rotate
##
## The following object is masked from 'package:dplyr':
##
## where
library(patchwork)
##
## Attaching package: 'patchwork'
##
## The following object is masked from 'package:cowplot':
##
## align_plots
# File paths for gasAcu5 masked aligned reads
ga5_roi_file <- "/labs/kingsley/ambenj/myosin_dups/analysis/ecotypic_depth/gasAcuv5_C4masked_nochrY/06_samtools_coverage_roi/roi_coverage_mapq3.txt"
metadata_file <- "/labs/kingsley/ambenj/myosin_dups/analysis/ecotypic_depth/metadata_227genomes.txt"
ga5_sim_roi_file <- "/labs/kingsley/ambenj/myosin_dups/analysis/ecotypic_depth/depth_simulations/206_genomes/02_coverage/stickleback_v5_assembly_MYH3C4dup_hardmasked_noChrY/roi_coverage_mapq3.txt"
# Output file paths
out_table_file <- "/labs/kingsley/ambenj/myosin_dups/analysis/ecotypic_depth/gasAcuv5_C4masked_nochrY/ecotypic_depth_samples.txt"
# Read roi file for gasAcu5 masked aligned reads
ga5_roi <- read_tsv(ga5_roi_file) %>%
mutate(bam = str_remove_all(bam, ".recal.realignGA5_C4masked.sort.merged.mkdup.bam|.recal.realignGA5_C4masked.sort.mkdup.bam|.recal.realignGA5_C4masked.sort.mkdup_mapq3_roi_coverage.txt|05_mkdup_index/"),
type = "real") %>%
rename(samp = bam)
## Rows: 1816 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): desc, region, chr, bam
## dbl (8): startpos, endpos, numreads, covbases, coverage, meandepth, meanbase...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ga5_roi
# Read simulations roi file for gasAcu5 masked aligned reads
ga5_sim_roi <- read_tsv(ga5_sim_roi_file) %>%
mutate(bam = str_remove_all(bam, ".RG.sorted.bam|/labs/kingsley/ambenj/myosin_dups/analysis/ecotypic_depth/depth_simulations/206_genomes/01_alignment/stickleback_v5_assembly_MYH3C4dup_hardmasked_noChrY/"),
type = "sim") %>%
rename(samp = bam)
## Rows: 88 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): desc, region, chr, bam
## dbl (8): startpos, endpos, numreads, covbases, coverage, meandepth, meanbase...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ga5_sim_roi
# Combine roi tables from real and sim samples
ga5_comb_roi <- rbind(ga5_roi, ga5_sim_roi)
ga5_comb_roi %>%
filter(type=="sim")
# Read whole genome files for gasAcu5 masked aligned reads
ga5_wg_files <- dir_ls(path = "/labs/kingsley/ambenj/myosin_dups/analysis/ecotypic_depth/gasAcuv5_C4masked_nochrY/06_samtools_coverage_wg/", glob = "*mapq3_wg_coverage.txt")
ga5_sim_wg_files <- dir_ls(path = "/labs/kingsley/ambenj/myosin_dups/analysis/ecotypic_depth/depth_simulations/206_genomes/02_coverage/stickleback_v5_assembly_MYH3C4dup_hardmasked_noChrY/", glob = "*mapq3_wg_coverage.txt")
# function to add file name to dataframe
read_and_record_filename <- function(filename){
read_tsv(filename) %>%
mutate(filename = path_file(filename))
}
# gather real wg files into dataframe
ga5_wg <- map_df(ga5_wg_files, read_and_record_filename)%>%
mutate(samp = str_remove_all(filename, ".recal.realignGA5_C4masked.sort.merged.mkdup_mapq3_wg_coverage.txt|.recal.realignGA5_C4masked.sort.mkdup_mapq3_wg_coverage.txt|../ecotypic_depth/gasAcuv5_C4masked_nochrY/06_samtools_coverage_wg"))
ga5_wg
# Gather simulated wg files into dataframe
ga5_sim_wg <- map_df(ga5_sim_wg_files, read_and_record_filename)%>%
mutate(samp = str_remove_all(filename, ".mapq3_wg_coverage.txt"))
ga5_sim_wg
# combine real and simulated data into one dataframe
ga5_comb_wg <- rbind(ga5_wg, ga5_sim_wg)
ga5_comb_wg
# Add metadata
metadata <- read_tsv(metadata_file)
## Rows: 227 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (9): seq_ID, pop, coord_approx, mar_fresh, notes, water_type, used_joint...
## dbl (7): GPS_north, GPS_east, PNW_independent_MvsF_c150, NorthEurope_indepen...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
metadata
Is the depth consistent across chromosomes?
# Plot coverage by chromosome
ga5_wg %>%
filter(!chr %in% c("chrM", "chrUn")) %>%
ggplot(aes(chr, meandepth)) +
geom_point() +
coord_flip() +
facet_wrap(~samp)
Samples that look like they are actually males:
# Plot coverage by chromosome for two normal samples and the samples that look like males
ga5_wg %>%
filter(!chr %in% c("chrM", "chrUn"),
samp %in% c("AKMA_X_2001_102", "AKST_X_2001_03", "BIGR_1_32_2007_02", "BK70_X_2010_02", "BNST_X_2006_10", "LAUR_X_1993_9_5", "MIDF_REND_2011_05", "MIDF_REND_2011_06", "NTRW_X_X_02", "REIV_X_2011_04")) %>%
ggplot(aes(chr, meandepth)) +
geom_point() +
coord_flip() +
facet_wrap(~samp, nrow=1)
# get whole genome mean depth for gasAcu5 masked aligned (excluding chrUn, chrM, chrY) per sample
ga5_wg_cov <- ga5_comb_wg %>%
filter(!chr %in% c("chrUn", "chrM", "chrY")) %>%
group_by(samp) %>%
summarize(weighted_mean_depth = weighted.mean(meandepth, endpos))
ga5_wg_cov
# Add additional depth and metadata info to rois
ga5_roi_cov <- left_join(ga5_comb_roi, ga5_wg_cov) %>%
left_join(., metadata, by = c("samp" = "seq_ID")) %>%
mutate(wg_norm_depth = meandepth / weighted_mean_depth,
ecotype = case_when(mar_fresh == "M" ~ "Marine",
mar_fresh == "F" ~ "Freshwater",
type == "sim" ~ "Simulation"),
ecotype = as.factor(ecotype),
ecotype = relevel(ecotype, "Marine"),
sex = case_when(samp %in% c("BIGR_1_32_2007_02", "BK70_X_2010_02", "BNST_X_2006_10", "LAUR_X_1993_9_5", "MIDF_REND_2011_05", "MIDF_REND_2011_06", "NTRW_X_X_02", "REIV_X_2011_04", "rabs_THREE_spine.male.fa_sim4X.rep0", "stickleback_v5-BOULxBDGB_F5-4copy.male.fa_sim4X.rep0", "stickleback_v5-BEPA-AF_ONT_M1020-5copy.male.fa_sim4X.rep0", "stickleback_v5-BLAU22_12-6copy.male.fa_sim4X.rep0") ~ "male",
TRUE ~ "female"),
samp_length = str_length(samp)) %>%
separate(samp, into = "acronym", remove=FALSE, sep="_")
## Joining with `by = join_by(samp)`
## Warning: Expected 1 pieces. Additional pieces discarded in 1752 rows [1, 2, 3, 4, 5, 6,
## 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# Get average from simulations
sim_average <- ga5_roi_cov %>%
filter(type=="sim", desc=="MYH3C3") %>%
mutate(CN = case_when(str_detect(samp, "3copy") ~ "3-copy",
str_detect(samp, "4copy") ~ "4-copy",
str_detect(samp, "5copy") ~ "5-copy",
str_detect(samp, "6copy") ~ "6-copy")) %>%
group_by(CN, sex) %>%
summarise(wg_norm_depth = mean(wg_norm_depth))
## `summarise()` has grouped output by 'CN'. You can override using the `.groups`
## argument.
sim_average
# Plot MYH3C3 read depth for balanced global selection, excluding low depth original Jones et al. 2012 samples
# Filter samples
ga5_roi_cov_filt <- ga5_roi_cov %>%
filter(desc == "MYH3C3", type=="real", sex=="female", NorthEurope_independent_MvsF_c151 %in% c(0,1)|PNW_independent_MvsF_c150 %in% c(0,1)|CaliforniaFreshwater_vs_AllPacificMarine_c153 %in% c(0,1), samp_length > 6) %>%
mutate(Individual = case_when(samp == "BIGR_1_32_2007_03" ~ "BIGR_M",
samp == "BIGR_52_54_2008_02" ~ "BIGR_F",
samp == "LITC_0_05_2008_841" ~ "LITC_M",
samp == "LITC_23_32_2008_306" ~ "LITC_F",
samp == "MIDF_BLUP_2011_01" ~ "MIDF_M",
samp == "MIDF_REND_2011_01" ~ "MIDF_F",
samp == "TYNE_1_2001_14" ~ "TYNE_M",
samp == "TYNE_8_2003_902" ~ "TYNE_F",
TRUE ~ acronym))
# Wilcoxon test for marine vs freshwater
w <- wilcox.test(subset(ga5_roi_cov_filt, ecotype == "Freshwater")$wg_norm_depth, subset(ga5_roi_cov_filt, ecotype == "Marine")$wg_norm_depth, alternative = "two.sided")
w
##
## Wilcoxon rank sum test with continuity correction
##
## data: subset(ga5_roi_cov_filt, ecotype == "Freshwater")$wg_norm_depth and subset(ga5_roi_cov_filt, ecotype == "Marine")$wg_norm_depth
## W = 1211, p-value = 9.834e-08
## alternative hypothesis: true location shift is not equal to 0
# Make plot
C3_global <- ga5_roi_cov_filt %>%
ggplot(aes(fct_reorder(Individual, wg_norm_depth), wg_norm_depth, color = ecotype)) +
geom_hline(data = sim_average, aes(yintercept = wg_norm_depth), linetype = "dashed", color = "grey", linewidth=0.25) +
geom_segment( aes(x=Individual, xend=Individual, y=0, yend=wg_norm_depth)) +
geom_point(size=1.5) +
scale_color_manual(values=c("#d73027","#0072b2"), name = "Ecotype") +
# scale_shape_manual(values = c(21,19)) +
scale_y_continuous(limits = c(0,3.7), expand = expansion(mult = c(0, 0))) +
theme_cowplot(6) +
panel_border(color="black", size=0.75) +
theme(axis.text.x=element_text(angle = -90, hjust = 0),
legend.position = c(0.02, 0.8),
legend.box.background = element_rect(color="black", linewidth=0.25, fill = "white"),
legend.box.margin = margin(4, 4, 4, 4)) +
xlab("Individual") +
ylab("MYH3C3 normalized depth")
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# ylim(0,3.5)
C3_global
# Plot read depth and lattitude correlation
lat <- ga5_roi_cov %>%
filter(desc == "MYH3C3", type=="real", sex=="female", NorthEurope_independent_MvsF_c151 %in% c(0,1)|PNW_independent_MvsF_c150 %in% c(0,1)|CaliforniaFreshwater_vs_AllPacificMarine_c153 %in% c(0,1), samp_length > 6) %>%
mutate(ocean = case_when(PNW_independent_MvsF_c150 %in% c(0,1)|CaliforniaFreshwater_vs_AllPacificMarine_c153 %in% c(0,1) ~ "Pacific",
NorthEurope_independent_MvsF_c151 %in% c(0,1) ~ "Atlantic")) %>%
ggplot(aes(GPS_north, wg_norm_depth, color = ecotype)) +
geom_hline(data = sim_average, aes(yintercept = wg_norm_depth), linetype = "dashed", color = "grey", linewidth=0.25) +
geom_point(alpha=0.7, size=1.5) +
geom_smooth(aes(fill = ecotype), method = "lm") +
stat_cor(aes(color = ecotype), size=2, method = "pearson", cor.coef.name = "R") +
# stat_cor(aes(color = ecotype), size=2, method = "spearman", cor.coef.name = "rho") +
scale_color_manual(values=c("#d73027","#0072b2"),) +
scale_fill_manual(values=c("#d73027","#0072b2")) +
scale_y_continuous(limits = c(0,3.7), expand = expansion(mult = c(0, 0))) +
facet_wrap(~factor(ocean, levels=c("Pacific", "Atlantic")), scales = "free_x") +
theme_cowplot(6) +
panel_border(color="black", size=0.75) +
theme(legend.position = "none") +
xlab("Latitude (°N)") +
ylab("MYH3C3 normalized depth")
lat
## `geom_smooth()` using formula = 'y ~ x'
# Plot hybrid zones
hz_plot <- ga5_roi_cov %>%
filter(desc == "MYH3C3", sex == "female", !is.na(`used _river_comparisons`), samp_length > 6) %>%
mutate(location = as.factor(case_when(acronym == "BIGR" ~ "Big River,\nCA, USA",
acronym %in% c("BNMA", "BNST") ~ "Bonsall River,\nBC, Canada",
acronym == "LITC" ~ "Little Campbell River,\nBC, Canada",
acronym == "TYNE" ~ "Tyne River,\nScotland, UK",
acronym == "MIDF" ~ "Midfjardara River,\nIceland")),
location = fct_relevel(location, "Big River,\nCA, USA", "Little Campbell River,\nBC, Canada", "Bonsall River,\nBC, Canada", "Tyne River,\nScotland, UK", "Midfjardara River,\nIceland")) %>%
ggplot(aes(location, wg_norm_depth, color = ecotype)) +
geom_hline(data = sim_average, aes(yintercept = wg_norm_depth), linetype = "dashed", color = "grey", linewidth=0.25) +
geom_beeswarm(cex=2, dodge.width = .7, alpha = 0.7) +
geom_boxplot(fill = NA, outlier.shape = NA) +
scale_color_manual(values=c("#d73027","#0072b2")) +
scale_y_continuous(limits = c(0,3.7), expand = expansion(mult = c(0, 0))) +
# ylim(0,2.5) +
ylab("MYH3C3 normalized depth") +
theme_cowplot(6) +
panel_border(color="black", size=0.75) +
theme(legend.position = "none",
axis.title.x=element_blank()) +
stat_compare_means(aes(group = ecotype), label.x = 1, label.y = 3.3, size = 3, label="p.signif")
hz_plot
# Arrange plots using cowplot
# align all plots vertically
plots <- cowplot::align_plots(C3_global, lat, hz_plot, align = 'v', axis = 'l')
## `geom_smooth()` using formula = 'y ~ x'
top_row <- plot_grid(
plots[[1]],
labels = c("B"),
label_size = 12,
nrow = 1
)
bottom_row <- plot_grid(
plots[[2]], plots[[3]],
labels = c("C", "D"),
rel_widths = c(1, 1),
label_size = 12,
nrow = 1
)
final_plot <- plot_grid(top_row, bottom_row, ncol = 1, rel_heights = c(1.1, 1), label_size = 12)
final_plot
# Save figure to separate file
ggsave(
"/labs/kingsley/ambenj/myosin_dups/analysis/ecotypic_depth/gasAcuv5_C4masked_nochrY/ecotypic_depth_figure.pdf",
plot = final_plot,
scale = 1,
width = 6.5,
height = 3.75,
units = c("in"),
dpi = 300,
)
# Plot MYH3C3 read depth for locations with multiple samples
ga5_roi_cov
multi_samp <- ga5_roi_cov %>%
filter(desc == "MYH3C3",
sex=="female",
type=="real",
samp_length > 6) %>%
group_by(pop) %>%
mutate(pop = case_when(acronym == "BOUL" ~ "Boulton Lake, Haida Gwaii, BC, Canada",
pop == "Drizzle Lake, Haida Gwaii" ~ "Drizzle Lake, Haida Gwaii, BC, Canada",
pop == "Friant River" ~ "Friant River, CA, USA",
acronym == "GOLD" ~ "Gold Creek, Haida Gwaii, BC, Canada",
acronym == "MAYR" ~ "Mayer Lake, Haida Gwaii, BC, Canada",
acronym == "RDSP" ~ "Roadside Pond, Haida Gwaii, BC, Canada",
pop == "Spence Lake, Haida Gwaii" ~ "Spence Lake, Haida Gwaii, BC, Canada",
pop == "Spence Lake Outlet, Haida Gwaii" ~ "Spence Lake Outlet, Haida Gwaii, BC, Canada",
pop == "Vifissta_avatn" ~ "Vifisstaðavatn Lake, Iceland",
pop == "Wallace Lake" ~ "Wallace Lake, AK, USA",
TRUE ~ pop)) %>%
mutate(pop_total= n()) %>%
filter(pop_total > 1, is.na(`used _river_comparisons`), acronym != "LOBG") %>%
mutate(pop = factor(pop, levels = c("Friant River, CA, USA",
"Boulton Lake, Haida Gwaii, BC, Canada",
"Spence Lake, Haida Gwaii, BC, Canada",
"Spence Lake Outlet, Haida Gwaii, BC, Canada",
"Drizzle Lake, Haida Gwaii, BC, Canada",
"Gold Creek, Haida Gwaii, BC, Canada",
"Mayer Lake, Haida Gwaii, BC, Canada",
"Roadside Pond, Haida Gwaii, BC, Canada",
"Wallace Lake, AK, USA",
"Vifisstaðavatn Lake, Iceland")))
within_pop_plot <- multi_samp %>%
ggplot(aes(pop, wg_norm_depth, color = ecotype)) +
geom_hline(data = sim_average, aes(yintercept = wg_norm_depth), linetype = "dashed", color = "grey") +
geom_beeswarm(size = 1.5, cex=2.5,alpha = 0.7) +
geom_boxplot(fill = NA, outlier.shape = NA) +
scale_color_manual(values=c("#0072b2", "#d73027")) +
scale_y_continuous(limits = c(0,3.2), expand = expansion(mult = c(0, 0))) +
xlab("Population") +
ylab("MYH3C3 normalized depth") +
theme_cowplot(8) +
panel_border(color="black", size=0.75) +
theme(legend.position = c(0.05, 0.85),
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
plot.margin = margin(l = 30, r = 20))
within_pop_plot
# Save figure to separate file
ggsave(
"/labs/kingsley/ambenj/myosin_dups/analysis/ecotypic_depth/gasAcuv5_C4masked_nochrY/within_pop_depth_figure.pdf",
plot = within_pop_plot,
scale = 1,
width = 4,
height = 4,
units = c("in"),
dpi = 300,
)
# Make table of samples used for figure
fig_table <- ga5_roi_cov %>%
# filter(desc == "MYH3C3", type=="real", sex=="female", NorthEurope_independent_MvsF_c151 %in% c(0,1)|PNW_independent_MvsF_c150 %in% c(0,1)|CaliforniaFreshwater_vs_AllPacificMarine_c153 %in% c(0,1)|!is.na(`used _river_comparisons`), samp_length > 6) %>%
filter(desc == "MYH3C3", type=="real", sex=="female", samp_length > 6) %>%
mutate(global_analysis = case_when(NorthEurope_independent_MvsF_c151 %in% c(0,1)|PNW_independent_MvsF_c150 %in% c(0,1)|CaliforniaFreshwater_vs_AllPacificMarine_c153 %in% c(0,1) ~ "yes",
TRUE ~ "no")) %>%
mutate(Figure1B_label = case_when(samp == "BIGR_1_32_2007_03" ~ "BIGR_M",
samp == "BIGR_52_54_2008_02" ~ "BIGR_F",
samp == "LITC_0_05_2008_841" ~ "LITC_M",
samp == "LITC_23_32_2008_306" ~ "LITC_F",
samp == "MIDF_BLUP_2011_01" ~ "MIDF_M",
samp == "MIDF_REND_2011_01" ~ "MIDF_F",
samp == "TYNE_1_2001_14" ~ "TYNE_M",
samp == "TYNE_8_2003_902" ~ "TYNE_F",
samp == "BNMA_X_2006_01" ~ "BNMA",
samp == "BNST_X2006_08" ~ "BNST",
!is.na(`used _river_comparisons`) ~ NA,
global_analysis == "yes" ~ acronym),
analysis_group = case_when(global_analysis == "yes" & !is.na(`used _river_comparisons`) ~ "Global comparison, Hybrid zone",
global_analysis == "yes" & samp %in% multi_samp$samp ~ "Global comparison, Within population",
global_analysis == "yes" ~ "Global comparison",
samp %in% multi_samp$samp ~ "Within population",
!is.na(`used _river_comparisons`) ~ "Hybrid zone"),
region = case_when(PNW_independent_MvsF_c150 %in% c(0,1)|CaliforniaFreshwater_vs_AllPacificMarine_c153 %in% c(0,1) ~ "Pacific",
NorthEurope_independent_MvsF_c151 %in% c(0,1) ~ "Atlantic",
str_detect(samp, "MIDF|TYNE") ~ "Atlantic",
str_detect(samp, "BNMA|BNST|BIGR|LITC") ~ "Pacific"),
wg_norm_depth = round(wg_norm_depth, digits = 3)) %>%
select(samp, Figure1B_label, pop, GPS_north, GPS_east, ecotype, water_type, wg_norm_depth, analysis_group, region, NorthEurope_independent_MvsF_c151, PNW_independent_MvsF_c150, CaliforniaFreshwater_vs_AllPacificMarine_c153)
# Write table to output file
colnames(fig_table) <- c("Sample", "Figure1B label", "Population", "GPS (North)", "GPS (East)", "Ecotype", "Water type", "MYH3C3 normalized depth", "Analysis group(s)", "Ocean basin", "NorthEurope_independent_MvsF_c151", "PNW_independent_MvsF_c150", "CaliforniaFreshwater_vs_AllPacificMarine_c153")
fig_table
fig_table %>%
write_tsv(out_table_file)
# Count n in each group for global analysis
fig_table %>%
filter(str_detect(`Analysis group(s)`, "Global")) %>%
group_by(`Ocean basin`, Ecotype) %>%
summarize(n = n())
## `summarise()` has grouped output by 'Ocean basin'. You can override using the
## `.groups` argument.
# Count n in each group for global analysis
fig_table %>%
filter(str_detect(`Analysis group(s)`, "Within")) %>%
group_by(Population) %>%
summarize(n = n())
# Count n in each group for hybrid zone analysis
fig_table %>%
filter(str_detect(`Analysis group(s)`, "Hybrid")) %>%
group_by(Population, Ecotype) %>%
summarize(n = n())
## `summarise()` has grouped output by 'Population'. You can override using the
## `.groups` argument.
# Read tree
tree <- read.tree("/labs/kingsley/ambenj/myosin_dups/analysis/ecotypic_depth/phylogeny/subsample.eco.min4.phy.treefile")
# Extract names from tree and standardize names for join
metadata <- data.frame(tip=tree$tip.label) %>%
mutate(Sample = str_replace_all(tip, "\\|", "_")) %>%
left_join(., fig_table)
## Joining with `by = join_by(Sample)`
p_tree <- ggtree(tree) %<+% metadata +
geom_tiplab(aes(label= `Figure1B label`, color = Ecotype), size = 2, align=TRUE, linesize=.5) +
geom_text2(aes(
subset = !isTip & as.numeric(label) >= 70, label = label), hjust = 1.1, vjust = 1.2, size = 1.8) +
# geom_text2(aes(label = node), hjust = 1.1, vjust = 1.2, size = 2) +
scale_size(range = c(1, 5)) +
geom_treescale(x = 0.02, y = 95, width = 0.05, fontsize = 2) +
scale_color_manual(values = c(Freshwater="#0072b2", Marine="#d73027")) +
theme_tree() +
theme(legend.position = "none") +
xlim_tree(0.35) +
coord_cartesian(clip = "off")
# Extract tip order from the tree
tip_order <- p_tree$data %>%
dplyr::filter(isTip) %>%
dplyr::arrange(y) %>%
dplyr::pull(label)
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
metadata$tip <- factor(metadata$tip, levels = tip_order)
# Make a lollipop plot using ggplot2
p_lollipop <- ggplot(metadata, aes(y = tip, color = Ecotype)) +
geom_vline(data = sim_average, aes(xintercept = wg_norm_depth), linetype = "dashed", color = "grey", linewidth=0.25) +
geom_segment(aes(x = 0, xend = `MYH3C3 normalized depth`), linewidth = 0.6) +
geom_point(
aes(x = `MYH3C3 normalized depth`),
size = 1.5
) +
scale_color_manual(values = c(Freshwater="#0072b2", Marine="#d73027")) +
scale_x_continuous(limits = c(0,4), breaks = c(0, 2, 4), expand = expansion(mult = c(0, 0))) +
xlab("MYH3C3 normalized depth") +
theme_cowplot(8) +
panel_border(color="black", size=0.75) +
theme(
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position = "top"
)
# Combine tree and lollipop plot
p_final <- p_tree + plot_spacer() + p_lollipop +
plot_layout(widths = c(1.5, -0.06, 0.5))
p_final
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
# Save figure to separate file
ggsave(
"/labs/kingsley/ambenj/myosin_dups/analysis/ecotypic_depth/gasAcuv5_C4masked_nochrY/subsample.eco.min4.phy_depth_plot.pdf",
plot = p_final,
scale = 1,
width = 5,
height = 8,
units = c("in"),
dpi = 300,
)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion